RNA velocity¶

In [1]:
import scvelo as scv
import numpy as np
import pandas as pd
import scipy
import scanpy as sc

import os
In [2]:
sc.settings.vector_friendly = False
# scv.set_figure_params( dpi=300, dpi_save=300, frameon=False, figsize=(4, 4), format='png', fontsize=16)
scv.set_figure_params(figsize=(2, 5))
In [3]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

Parmameter¶

In [4]:
treatment = None
adata_file = 'data/object/scvelo_all.h5ad'

Import data¶

In [5]:
adata = sc.read_h5ad('data/object/velocyto.h5ad')
obs = pd.read_csv('data/object/int/meta/meta.csv', index_col=0)
obsm = pd.read_csv('data/object/int/reductions/X_umap/reduction.csv', index_col=0)

Filter velocity matrix and combine with obs and obsm from previous analysis.¶

In [6]:
population_names = ['Ery (1)', 'Ery (2)', 'Ery (3)', 'Ery (4)', 'Ery (5)', 'Ery (6)']
celltype_colours = [
    '#FC9272', 
    '#FB6A4A', 
    '#EF3B2C', 
    '#CB181D', 
    '#A50F15', 
    '#67000D'
]
In [7]:
# Filter obs by Ery annotation and treatment 
obs = obs[obs['leiden_annotation'].isin(population_names)]

if treatment is None: 
    obs = obs 
else: 
    obs = obs[obs['treatment'] == treatment]

# Filter obsm by cell index
obsm = obsm[obsm.index.isin(obs.index)]
In [8]:
# Filter velocity adata by obs 
adata = adata[adata.obs.index.isin(obs.index)]
In [9]:
# Order index to match velocity adata 
obs = obs.reindex(adata.obs.index)
obsm = obsm.reindex(adata.obs.index)

adata.obs = obs
adata.obsm['X_umap'] = obsm.to_numpy()
In [10]:
adata.uns['leiden_annotation_colors'] = celltype_colours
In [11]:
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'leiden_annotation', 'sample_rep', 'cc_phase_class', 'pMt_RNA', 'pHb_RNA', 'pRb_RNA'], wspace=1, ncols=3)
... storing 'sample_name' as categorical
... storing 'sample_rep' as categorical
... storing 'tissue' as categorical
... storing 'treatment' as categorical
... storing 'sample_group' as categorical
... storing 'sample_path' as categorical
... storing 'sample_dir' as categorical
... storing 'cellranger_class' as categorical
... storing 'cc_phase_class' as categorical
... storing 'label_main_immgen' as categorical
... storing 'label_fine_immgen' as categorical
... storing 'label_main_haemosphere' as categorical
... storing 'label_fine_haemosphere' as categorical
... storing 'qc_class' as categorical
... storing 'doublet_cluster' as categorical
... storing 'doublet_class' as categorical
... storing 'doublet_class_int' as categorical
... storing 'leiden_annotation' as categorical
In [12]:
adata_temp = adata.copy()

Pre-processing¶

In [13]:
# HVG on all data 
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.05, min_disp=0.1)
hvg_1 = adata.var_names[adata.var.highly_variable].tolist()
In [14]:
# HVG on scvelo filtered data
adata = adata_temp.copy()
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)

hvg_2 = adata.var_names.tolist()
Filtered out 26253 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
In [15]:
hvg = list(set(hvg_1) | set(hvg_2))

Imputation¶

In [16]:
adata = adata_temp.copy()
adata = adata[:,hvg]
In [17]:
scv.pp.filter_and_normalize(adata)
scv.pp.moments(adata, n_neighbors=50)
Trying to set attribute `.obs` of view, copying.
Normalized count data: X, spliced, unspliced.
Logarithmized X.
computing neighbors
    finished (0:00:56) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:07) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)

Compute velocities¶

In [18]:
scv.tl.recover_dynamics(adata)
recovering dynamics (using 1/32 cores)
    finished (0:32:15) --> added 
    'fit_pars', fitted parameters for splicing dynamics (adata.var)
In [19]:
scv.tl.velocity(adata,mode='dynamical')
scv.tl.velocity_graph(adata)
computing velocities
    finished (0:00:14) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph (using 1/32 cores)
    finished (0:01:21) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)
In [20]:
scv.tl.latent_time(adata)
computing terminal states
WARNING: Uncertain or fuzzy root cell identification. Please verify.
    identified 3 regions of root cells and 1 region of end points .
    finished (0:00:07) --> added
    'root_cells', root cells of Markov diffusion process (adata.obs)
    'end_points', end points of Markov diffusion process (adata.obs)
computing latent time using root_cells as prior
    finished (0:00:08) --> added 
    'latent_time', shared time (adata.obs)
In [21]:
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])
computing velocity embedding
    finished (0:00:08) --> added
    'velocity_umap', embedded velocity vectors (adata.obsm)
In [22]:
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])

Save results¶

In [23]:
adata.write_h5ad(adata_file)